acs_vars_2018_5yr <-
listCensusMetadata(
name = "2018/acs/acs5",
type = "variables"
)
acs_vars_2019_1yr <-
listCensusMetadata(
name = "2019/acs/acs1",
type = "variables"
)
census_race_labels = c("White Alone",
"Black or African American",
"American Indian and Alaska Native Alone",
"Asian Alone",
"Native Hawaiian and Other Pacific Islander Alone)",
"Some Other Race Alone",
"Two or More Races")
bay_educ_race = 1:7 %>%
map_dfr(function(x) {
getCensus(name = "acs/acs1",
vintage = 2019,
region = "county:085",
regionin = "state:06",
vars = paste0("group(B15002", LETTERS[x], ")")) %>%
select(!c(GEO_ID, state, NAME) &
!ends_with(c("EA", "MA", "M"))) %>%
pivot_longer(ends_with("E"),
names_to = "variable",
values_to = "estimate") %>%
left_join(acs_vars_2019_1yr %>% select(name, label),
by = c("variable" = "name")) %>%
select(-variable) %>%
separate(label,
into = c(NA, NA, "sex", "education"),
sep = "!!") %>%
filter(!is.na(education)) %>%
mutate(race = census_race_labels[x])
})
bay_educ_race
bay_race_total = bay_educ_race %>%
group_by(race) %>%
summarize(estimate = sum(estimate)) %>%
mutate(education = "Total")
bay_educ_race %>% group_by(education, race) %>%
summarize(estimate = sum(estimate)) %>%
rbind(bay_race_total) %>%
ggplot() +
geom_bar(aes(x = education %>%
factor(levels = rev(c("Total",
bay_educ_race$education[1:8]))),
y = estimate,
fill = race),
stat = "identity",
position = "fill") +
labs(x = "Educational attainment",
y = "Proportion of households",
title = "Santa Clara County educational attainment by race",
subtitle = "Population 25 years or older",
fill = "Race of householder") +
coord_flip() +
theme(legend.position = "bottom",
legend.direction = "vertical")
We can observe some more interesting phenomena by flipping our analysis, that is, looking at the composition of educational attainment for every race.
bay_educ_race %>% group_by(education, race) %>%
summarize(estimate = sum(estimate)) %>%
ggplot() +
geom_bar(aes(x = race,
y = estimate,
fill = education %>%
factor(levels = rev(c("Total",
bay_educ_race$education[1:8])))),
stat = "identity",
position = "fill") +
labs(x = "Proportion of households",
y = "Educational attainment",
title = "Santa Clara County race by educational attainment",
subtitle = "Population 25 years or older",
fill = "Educational attainment") +
coord_flip() +
theme(legend.position = "bottom",
legend.direction = "vertical")
These statistics testify to the racial stratification in Santa Clara County jobs. In the tech sector in particular, whites and Asians are overrepresented, while across all professions requiring a college degree, whites are overrepresented.
ca_pums = get_pums(variables = c("PUMA", "ACCESS", "SCHG"),
state = "CA",
year = 2019,
survey = "acs1",
recode = T)
saveRDS(ca_pums, file = "ca_pums.rds")
pums_vars_2019 = pums_variables %>%
filter(year == 2019, survey == "acs1")
pums_vars_2019_inds = pums_vars_2019 %>%
distinct(var_code, var_label, data_type, level) %>%
filter(level == "person")
ca_pums = readRDS("ca_pums.rds")
ca_pumas = pumas("CA", cb = T, progress_bar = F)
# Isolate Santa Clara County
ca_counties = counties("CA", cb = T, progress_bar = F)
county_name = c("Santa Clara")
sc_county = ca_counties %>%
filter(NAME %in% county_name)
sc_pumas = ca_pumas %>%
st_centroid() %>%
.[sc_county, ] %>%
mutate(PUMACE10 = as.numeric(PUMACE10)) %>%
st_set_geometry(NULL) %>%
left_join(ca_pumas %>% select(GEOID10)) %>%
st_as_sf()
sc_pums = ca_pums %>%
mutate(PUMA = as.numeric(PUMA)) %>%
filter(PUMA %in% sc_pumas$PUMACE10)
# Generate number of K-12 students without internet
sc_internet = sc_pums %>%
filter(ACCESS != "b" &
SCHG != "bb") %>%
mutate(ACCESS = as.numeric(ACCESS),
SCHG = as.numeric(SCHG)) %>%
filter(!duplicated(SERIALNO) &
(SCHG >= 2 &
SCHG <= 14)) %>%
mutate(no_internet = ifelse(
(ACCESS == 3),
PWGTP,
0)) %>%
group_by(PUMA) %>%
left_join(sc_pumas %>% select(PUMACE10),
by = c("PUMA" = "PUMACE10"))
# Print number
sum(sc_internet$no_internet, na.rm = T)
## [1] 2752
# Total children being tracked
sum(sc_internet$PWGTP, na.rm = T)
## [1] 163553
# Percentage of children
sum(sc_internet$no_internet, na.rm = T) /
(sum(sc_internet$PWGTP, na.rm = T))
## [1] 0.01682635
According to the ACS 2018 5-year results, 1.9% of K-12 students in Santa Clara County (35 out of 1835) do not have Internet access. There are a few important caveats to note about the data.
First, in the ACS 2018 and 2019 1-year results, when I ran this same code I ended up with 0 students without Internet access, which I highly doubt is the case — it’s more likely that I got that result because the 1-year samples only draw from highly populated areas, and in Santa Clara County the people least likely to have internet access will likely live in sparsely populated areas not covered by the 1-year sample. This issue suggests that less populated areas that are less likely to have internet access are also less likely to be sampled, and thus that this percentage is probably an underestimate.
A related problem is that the sample of K-12 children is very tiny to begin with. Estimates made from small samples are bound to have a large margin of error: another reason to take this estimate with a grain of salt.
Finally, the 2018 5-year data might not be very useful for understanding the state of internet access in 2020. The 2018 5-year data by definition draws on a sample from 2013-18, and internet access has broadened greatly even just between 2018 and 2020, let alone from 2013 to 2020.
# Broaden PUMS data to all Bay Area
bay_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_counties = ca_counties %>%
filter(NAME %in% bay_county_names)
bay_pumas = ca_pumas %>%
st_centroid() %>%
.[bay_counties, ] %>%
mutate(PUMACE10 = as.numeric(PUMACE10)) %>%
st_set_geometry(NULL) %>%
left_join(ca_pumas %>% select(GEOID10)) %>%
st_as_sf()
bay_pums = ca_pums %>%
mutate(PUMA = as.numeric(PUMA)) %>%
filter(PUMA %in% bay_pumas$PUMACE10)
# Generate number of K-12 students without internet
bay_internet = bay_pums %>%
filter(ACCESS != "b" &
SCHG != "bb") %>%
mutate(ACCESS = as.numeric(ACCESS),
SCHG = as.numeric(SCHG)) %>%
filter(!duplicated(SERIALNO) &
(SCHG >= 2 &
SCHG <= 14)) %>%
mutate(no_internet = ifelse(
(ACCESS == 3),
PWGTP,
0)) %>%
group_by(PUMA) %>%
mutate(perc_nointernet = sum(no_internet, na.rm = T)/
(sum(PWGTP, na.rm = T))*100) %>%
left_join(bay_pumas %>% select(PUMACE10),
by = c("PUMA" = "PUMACE10")) %>%
st_as_sf()
pums_pal = colorNumeric(palette = "Oranges",
domain = bay_internet$perc_nointernet)
leaflet() %>%
addTiles() %>%
addPolygons(data = bay_internet,
fillColor = ~pums_pal(perc_nointernet),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(round(perc_nointernet),
"% K-12 students without Internet access"),
highlightOptions = highlightOptions(weight = 2, opacity = 1)) %>%
addLegend(data = bay_internet,
pal = pums_pal,
values = ~perc_nointernet,
title = "% K-12 students<br>without Internet access")
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'